libname mylib 'g:\Dropbox\Wall Street Bets (Private)\Data';


*This program contains the information needed to create Figure 6 of the paper;
*The program references taq_trading which is constructed in the  Daily Taq (Intermediate File) code;



*trading measures are already percentiles ranging from 0 to 99(see Daily Taq program), so  just create extreme percentile indicators;


data taq;
set mylib.taq_trading;
if std_retail_trades121 >= 99 then retail99 =1; else retail99 =0;
if std_retail_trades121 >= 95 then retail95 =1; else retail95 =0;
if std_retail_trades121 >= 90 then retail90 =1; else retail90 =0;

if 0<=std_retail_trades121 <=0 then retail0 =1; else retail0 =0;
if 0<=std_retail_trades121 <=4 then retail5 =1; else retail5 =0;
if 0<=std_retail_trades121 <=9 then retail10 =1; else retail10 =0;

if retail_oib_trades = . then delete;
if inst_oib =  . then delete;
if retail_oib = . then delete;
if l1_std_retail_trades121 = . then delete;
if l1_std_retail_vol121 = . then delete;
if l1_std_inst_vol121 = . then delete;
net_dd2_pre = (net_dd2 - net_dd2_post);
run;

proc sort data=taq;
by date;
run;


proc means data=taq;
var net_dd2_pre  NET_DD2_POST ;
run;





proc standard data=taq out=taq_fe mean=0;
by date;
var  retail_oib_trades retail_oib inst_oib  std_retail_trades121  std_retail_VOL121 std_INST_VOL121
l1_retail_oib_trades l1_retail_oib l1_inst_oib  l1_std_retail_trades121  l1_std_retail_VOL121 l1_std_INST_VOL121
net_dd2 net_dd2_pre  NET_DD2_POST

NON_RESEARCH2 NON_RESEARCH2_POST NET_SA2 NET_SA2_POST  
 ln_size ln_bm  mom1 mom5 mom6_26 lag_sent1 lag_sent5 lag_sent6_26
 abs_mom1 abs_mom5  abs_mom6_26 abs_lag_sent1 abs_lag_sent5 abs_lag_sent6_26 
heavy_news news_rank2 

l1_retail_oib_trades l1_retail_oib l1_inst_oib l1_std_retail_trades121  l1_std_retail_VOL121 l1_std_INST_VOL121

news_missing size_missing bm_missing retail99 retail95 retail90 retail0 retail5 retail10 ;
run;


*code below gives estimtes for 1st percentile;
*replace retail0 with retail5 retail10 retail90 retail95 and retail99 for other esimtes;
*note - the estimtes reported are the percentage point estimates. The estimates reported in the figure are 
converted to percentage estimtes by scaling by the mean the dependent variable;







%let yourdata=trading_fe;

%let firmid=ticker ;
%let time=cum_month;

%let y=retail0 ;
%let x=  net_dd2_pre  NET_DD2_POST 
NON_RESEARCH2 NON_RESEARCH2_POST NET_SA2 NET_SA2_POST  
 ln_size ln_bm  mom1 mom5 mom6_26 lag_sent1 lag_sent5 lag_sent6_26
 abs_mom1 abs_mom5  abs_mom6_26 abs_lag_sent1 abs_lag_sent5 abs_lag_sent6_26 
heavy_news news_rank2 

l1_retail_oib_trades l1_retail_oib l1_inst_oib l1_std_retail_trades121  l1_std_retail_VOL121 l1_std_INST_VOL121

news_missing size_missing bm_missing ;
RUN;

proc surveyreg data=&yourdata;

    cluster &firmid;
    model &y = &x /covb;
    ods output covb=firm;
run;
quit;


*cluster by second dimension (e.g., year);
proc surveyreg data=&yourdata;

    cluster &time;
    model &y = &x /covb;
    ods output covb=year;
run;
quit;

*cluster by intersection of the two dimensions (e.g, firm-year);
proc surveyreg data=&yourdata;
    cluster &firmid &time;
    model &y = &x /covb;
    ods output covb=both;
    ods output parameterestimates=parm;
run;
quit;

*keeps original parameter estimates;
data parm; set parm;
    keep parameter estimate;
run;

*returns a dataset with a scalar for the dimensions of the var/cov matrix. This is needed to extract the square roots of the diagonals later on;
data parm1; set parm;
    n=_n_;
    m=1;
    keep m n;
run;

data parm1; set parm1; by m;
    if last.m;
    keep n;
run;

*uses matrix algebra interface to construct Var-cov matrix and extract the standard errors;
proc iml;
    use both;
    read all var _num_ into Z;
    print Z;
    use firm;
    read all var _num_ into X;
    print X;
    use year;
    read all var _num_ into Y;
    print Y;
    use parm1;
    read all var _num_ into n;
    print n;

    B=X+Y-Z;
    C=I(n);
    D=J(n,1);
    E=C#B;
    F=E*D;
    G=F##.5;

    print B;
    print G;
    create b from G [colname='stderr']; ;
    append from G;
quit;

*creates a dataset called 'results' that contains the parameter estimates, the SE's, and the t-stats;
data results; merge parm B;
    tstat=estimate/stderr;
run;

proc print data=results;
run;




data spec3;
set results;
estimate3 = estimate  ;
obs = _n_;
keep parameter estimate3 obs;
run;
data spec3b;
set results;
estimate3 = tstat ;
obs = _n_ + .5;
keep parameter estimate3 obs;
run;
data spec3;
set spec3 spec3b;
run;
proc sort data=spec3;
by obs;
run;

